---
title: "Replication Document"
author: "Francesco Bailo"
date: '`r format(Sys.Date(), "%d %B %Y")`'
output: 
  pdf_document:
    toc: true
    toc_depth: 3
---

# Paper details

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE, warning = FALSE)
```

# Packages 

```{r}
library(igraph)
library(truncnorm)
library(openssl)
library(data.table)
library(parallel)
library(knitr)
library(ggplot2)
library(scales)
library(DBI)
library(RSQLite)
library(rjson)
library(geosphere)
library(rgdal)
library(sp)
library(sf)
library(rgeos)
library(dplyr)
library(gridExtra)
library(scales)
library(egg)
library(srvyr)
library(stargazer)
library(reshape2)
library(intergraph)
library(statnet)
library(ergm)
library(rgeos)
library(stringr)
library(texreg)
library(RColorBrewer)
library(knitr)
```


# Functions

```{r}
sqliteGetTable <- function(database, table) {
  require(DBI)
  require(RSQLite)
  con <- dbConnect(RSQLite::SQLite(), dbname = database)
  query <- dbSendQuery(con, paste("SELECT * FROM ", table, ";", sep="")) 
  result <- fetch(query, n = -1)
  dbClearResult(query)
  dbDisconnect(con)
  return(result)
}

addDegreeToVertices <- function (g) {
  require(igraph)
  try(if(!(is.igraph(g))) stop("Not an igraph graph"))
  if (is_directed(g)) {
    V(g)$indegree <- igraph::degree(g, V(g), mode = 'in') 
    V(g)$outdegree <- igraph::degree(g, V(g), mode = 'out') 
  } else {
    try(if('degree' %in% igraph::list.vertex.attributes(g)) 
      stop("Vertex attribure degree already exists"))
    V(g)$degree <- igraph::degree(g, V(g)) 
  }
  return(g)
}

vertexAttributesAsDataFrame <- function(g) {
  try(if(!(is.igraph(g))) stop("Not an igraph graph"))
  vertex_attributes <- igraph::list.vertex.attributes(g)
  df <- data.frame(matrix(NA, nrow = vcount(g), ncol = 0))
  for (a in vertex_attributes) {
    df <- cbind(df, data.frame(igraph::get.vertex.attribute(g, a)))
  }
  names(df) <- igraph::list.vertex.attributes(g)
  return(df)
}
```


# Network and diffusion models

## Network setup functions

```{r, echo = TRUE}
read_chunk('replication_code_01_simulation_functions.R')
```


```{r simulation-functions, eval = TRUE}

```

## Network initialisation

```{r}
set.seed(28100)

g <- randomSocialNetwork(n_communities = 20, g_size = 490, min_comm_size = 15, 
                         within_p = .115, between_p = 0.00005)
g_lo <- layout.fruchterman.reingold(g)

# Node attributes
V(g)$political_distress <- trunctedProp(vcount(g), mean=.5, sd = .2)
V(g)$internal_drive_wtout_internet <- rbinom(vcount(g), 1, p = 0.1) 
V(g)$internal_drive_wt_internet <- rbinom(vcount(g), 1, p = 0.5)  
V(g)$relational_drive <- trunctedProp(vcount(g), mean=.2, sd = .2)

# Edge attributes
E(g)$weight_sc <- trunctedProp(ecount(g), mean=.6, sd = .15)
E(g)$weight_nosc <- trunctedProp(ecount(g), mean=.2, sd = .15)

# Penalise cross-community edges
es <- as_edgelist(g)
es[,1] <- gsub("\\d", "", es[,1]) 
es[,2] <- gsub("\\d", "", es[,2]) 
is_same_comm <- es[,1] == es[,2]

E(g)$weight_nosc[!is_same_comm] <- 
  trunctedProp(sum(!is_same_comm), mean=0, sd = .15)

# Initialise
V(g)$active <- FALSE

# Create high social capital network 
V(g)$internal_drive <- V(g)$internal_drive_wtout_internet
g_wt_sc_net <- addSCNet(g)

# Number of nodes
vcount(g)

# Numebr of Edges
ecount(g)

# Percentage of edges running within a predefined social knot 
el <- as_edgelist(g)
sum(gsub("\\d","", el[,1]) == gsub("\\d","", el[,2])) / nrow(el)

save(g, g_lo, file = "replication_file_01_simulated_net.RData")

```


## Figure 1

```{r}
# Model network
colpal <- c('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', 
            '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', 
            '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', 
            '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', 
            '#ffffff', '#000000')

plot(g, vertex.label = NA, vertex.size = 4, layout = g_lo, 
     vertex.color = colpal[as.numeric(as.factor(V(g)$community))])
```

```{r}
pdf(file = "figure/network-model.pdf")
par(mar=c(0,0,0,0))
plot(g, vertex.label = NA, vertex.size = 4, layout = g_lo, 
     vertex.color = colpal[as.numeric(as.factor(V(g)$community))])
dev.off()
```

## Figure 2

```{r}
plot(g, vertex.label = NA, vertex.size = 4, layout = g_lo, 
     vertex.color = ifelse(V(g)$internal_drive == 1, "black", "white"), 
     edge.width = sqrt(E(g)$weight_nosc)*3.5)

plot(g_wt_sc_net, vertex.label = NA, vertex.size = 4, layout = g_lo, 
     vertex.color = ifelse(V(g)$internal_drive == 1, "black", "white"), 
     edge.width = sqrt(E(g)$weight_sc)*1.5)
```

```{r}
pdf(file = "figure/network-model-low-high-capital.pdf",
    width  = 16, height = 7)
par(mar=c(0,0,0,0), mfrow=c(1,2))
plot(g, vertex.label = NA, vertex.size = 4, layout = g_lo, 
     vertex.color = ifelse(V(g)$internal_drive == 1, "black", "white"), 
     edge.width = sqrt(E(g)$weight_nosc)*3.5)
plot(g_wt_sc_net, vertex.label = NA, vertex.size = 4, layout = g_lo, 
     vertex.color = ifelse(V(g)$internal_drive == 1, "black", "white"), 
     edge.width = sqrt(E(g)$weight_sc)*1.5)
dev.off()
```

## Network simulation

```{r, echo = TRUE}
read_chunk('replication_code_02_simulate_diffusion.R')
```

```{r simulate-diffusion, eval = FALSE}

```

Simulations runs in parallel without any `set.seed()`. To run the simulation execute: 


```{bash, eval = FALSE}
R -e 'setwd("/path/to/directory"); source("replication_code_01_simulation_functions.R"); source("replication_code_02_simulate_diffusion.R")'
```

## Figure 3

```{r}

load("replication_file_02_active_nodes.RData")

active_nodes_df <- 
  rbind(active_nodes_wt_sc_wtout_internet, active_nodes_wtout_sc_wt_internet,
        active_nodes_wtout_sc_wtout_internet, active_nodes_wt_sc_wt_internet)
active_nodes_df$group <- paste0(active_nodes_df$j, "-", active_nodes_df$what)

active_nodes_df <- 
  active_nodes_df %>%
  dplyr::group_by(t, what) %>%
  dplyr::summarize(mean = mean(activation),
                   se = sqrt(var(activation)/length(activation))) 

active_nodes_df$internet <- 
  grepl("wt internet", active_nodes_df$what)
active_nodes_df$internet <- 
  ifelse(active_nodes_df$internet, "Internet", "No Internet")
active_nodes_df$internet <- 
  factor(active_nodes_df$internet, levels = c("No Internet", "Internet"))
active_nodes_df$label <- 
  gsub(" (wt|wtout) internet", "", active_nodes_df$what)
active_nodes_df$label <- 
  gsub("wt sc", "high social capital", active_nodes_df$label)
active_nodes_df$label <- 
  gsub("wtout sc", "low social capital", active_nodes_df$label)

active_nodes_df$`mean and standard error` <- active_nodes_df$label

ggplot(active_nodes_df, aes(x=t, colour = `mean and standard error`)) +
  geom_line(aes(y=mean)) +
  geom_ribbon(aes(ymin = mean - se, ymax = mean + se, fill = `mean and standard error`), 
              colour = NA, alpha = 0.5) +
  facet_wrap(~internet) + 
  scale_y_continuous(label = percent, limits = c(0,1)) + theme_bw() + 
  labs(x='time', y="Activated nodes (%)") + theme(legend.position = 'bottom')
```

```{r}
pdf(file = "figure/network-model-diffusion.pdf",
    width  = 8, height = 5)
ggplot(active_nodes_df, aes(x=t, colour = `mean and standard error`)) +
  geom_line(aes(y=mean)) +
  geom_ribbon(aes(ymin = mean - se, ymax = mean + se, fill = `mean and standard error`), 
              colour = NA, alpha = 0.5) +
  facet_wrap(~internet) + 
  scale_y_continuous(label = percent, limits = c(0,1)) + theme_bw() + 
  labs(x='time', y="Activated nodes (%)") + theme(legend.position = 'bottom') +
  theme(text = element_text(family = "Palatino"))
dev.off()
```


# Meetup

## Meetup datasets

The following chunks are not replicable if you don't have access to `meetup_aug14.sqlite` and `meetup_mar15.sqlite`, which are password protected since they contain users' personal information.

The 2015 dataset was collected to correct the joining date of the 2014 dataset (`meetup_members_2014$joined`, ), which was not correctly parse from the API response. 

```{r}
meetup_members_2014 <- sqliteGetTable('meetup_aug14.sqlite', 'member')
meetup_groups_2014 <- sqliteGetTable('meetup_aug14.sqlite', '[group]')
meetup_events_2014 <- sqliteGetTable('meetup_aug14.sqlite', 'event')
meetup_members_2015 <- sqliteGetTable('meetup_mar15.sqlite', 'member')
meetup_groups_2015 <- sqliteGetTable('meetup_mar15.sqlite', '[group]')
meetup_events_2015 <- sqliteGetTable('meetup_mar15.sqlite', 'event')
```

### Data collection range

```{r}
min(meetup_members_2014$timestamp)
max(meetup_members_2014$timestamp)

min(meetup_members_2015$timestamp)
max(meetup_members_2015$timestamp)
```

### Number of records

```{r}
length(unique(c(meetup_members_2015$member_id, meetup_members_2014$member_id)))
nrow(meetup_members_2014)
nrow(meetup_groups_2014)
nrow(meetup_events_2014)
nrow(meetup_members_2015)
nrow(meetup_groups_2015)
nrow(meetup_events_2015)
```

### Users' joining date

```{r}
meetup_members_2014$joined <- 
  meetup_members_2015$joined[match(meetup_members_2014$member_id, 
                                   meetup_members_2015$member_id)]

sum(!is.na(meetup_members_2014$joined))
min(meetup_members_2014$joined, na.rm = T)
max(meetup_members_2014$joined, na.rm = T)
```

### Number of members indicating a Facebook account for which a joining date is available

```{r}
sum(!is.na(meetup_members_2014$facebook) & !is.na(meetup_members_2014$joined))
```


# Facebook

The following chunks are not fully replicable if you don't have access to ``, which are password protected since they contain users' personal information.

To collect information about existing Facebook relations among Meetup users, I proceeded as following:

1. I scraped public information for each of the `r sum(!is.na(meetup_members_2014$facebook))` Facebook accounts indicated by Meetup members as of August 2014 with the Python script `replication_code_03_scrape_facebook.py`;
2. I scraped public information available on the Facebook "Friends" page of each of the Facebook users identified in the previous step with with the Python script  `replication_code_04_scrape_facebook.py`;
3. I scraped public information available on the Facebook "Friendship" page corresponding to each Facebook relationship identified in the previous step with the Python script `replication_code_05_scrape_facebook.py`.

```{r}
fb_user_timeline <- 
  sqliteGetTable('facebook_user_timeline.sqlite', 'fbUsersTimeline')
fb_friendship_edgelist <- 
  sqliteGetTable('facebook_friendship.sqlite', 'fb_friendship_edgelist')
```

The `fb_user_timeline` dataset contains the Facebook joining date of `r sum(fb_user_timeline$joined != "NA")` Facebook users. The `fb_friendship_edgelist` dataset contains the creation date of `r nrow(fb_friendship_edgelist)` Facebook relationships. 

The data collection process from Facebook can be summarised as following:

1. `r sum(!is.na(meetup_members_2014$facebook))` is the number of Meetup users indicating a Facebook account in August 2014;
2. `r sum(fb_user_timeline$status == 1)` is the number of Facebook accounts found publicly accessible;
3. `r sum(fb_user_timeline$fb_id %in% unique(fb_friendship_edgelist$from))` is the number of Facebook accounts with a publicly accessible "Friends" page;
4. `r sum(fb_user_timeline$fb_id %in% unique(c(fb_friendship_edgelist$from, fb_friendship_edgelist$to)))` is the number of Facebook accounts indicated by Meetup users for which at least one Facebook friendship with another Meetup user is mapped (note: `r sum(fb_user_timeline$fb_id %in% unique(c(fb_friendship_edgelist$from, fb_friendship_edgelist$to))) - sum(fb_user_timeline$fb_id %in% unique(fb_friendship_edgelist$from))` Meetup/Facebook users with no publicly accessible Facebook "Friends" page appeared in their friends' "Friends" pages). 

# Facebook friendship graph of meetup members

```{r, eval = FALSE}
users <- fb_user_timeline
edgelist <- fb_friendship_edgelist

# Finding when user created their Facebook account (exclude all dates before 2007)
whenJoinedFB <- function(json) {
  if (length(json)==1 & json == 'null' | json == 'NA') return(NA)
  # print(json)
  # Tests
  # json1 = '[2013]'
  # json2 = '[2013, 2012, 1974]'
  # json3 = '[2013, 2012, 2009, 1943]'
  # json4 = '[2014, 1943]'
  require(rjson)
  fbActivity <- fromJSON(json)
  joined <- min(fbActivity[fbActivity>=2007])
  while(TRUE) {
    # Check whether there's continuity between supposed joining date and activity
    # (Possible the event in the timeline doesn't correspond to date of joining Facebook)
    if (joined == max(fbActivity)) {
      return(joined)
    } else if (((joined + 1) %in% fbActivity) & (joined %in% fbActivity))  {
      return(joined)
    } else {
      joined = joined + 1
    }
  }
  return(joined)
}

# Manual replace of missing data for 
# V(g)[is.na(V(g)$joined)]
missing_data <- read.csv("replication_file_03_manually_coded.csv", header = FALSE)
users$joined[users$fb_id %in% missing_data$V1] <- missing_data$V2
users$joined_fb <- sapply(users$active_years, whenJoinedFB)

# Checks
unique_id <- unique(c(edgelist$from, edgelist$to))
fb_id <- users$fb_id 
sum(unique_id %in% fb_id)

# Create graph
g <- 
  graph.data.frame(edgelist[,2:8], directed=FALSE, vertices=users[fb_id %in% unique_id,])
vcount(g)
ecount(g)

# Remove edges with error status (generated when requesting the friendship page)
g <- g - E(g)[E(g)$status == 2] 
vcount(g)
ecount(g)

# Remove edges among users who are not friends
g <- g - E(g)[E(g)$friends == 0]
vcount(g)
ecount(g)

# There are 20 multiedges
g <- igraph::simplify(g, edge.attr.comb="first")
g_fship <- g


# Add Meetup joining date (note: collected only in 2015)
meetup_members_tmp <- meetup_members_2015
meetup_members_tmp$facebook <- NULL
meetup_members_tmp <- 
  merge(meetup_members_tmp, 
        meetup_members_2014[,c("member_id","facebook")], 
        all.x = T, all.y = F)

V(g)$joined_mu <- 
  meetup_members_tmp$joined[match(V(g)$name, meetup_members_tmp$facebook)]
V(g)$member_id <- 
  meetup_members_tmp$member_id[match(V(g)$name, meetup_members_tmp$facebook)]

V(g)$lon <- 
  meetup_members_2014$lon[match(V(g)$member_id, meetup_members_2014$member_id)]
V(g)$lat <- 
  meetup_members_2014$lat[match(V(g)$member_id, meetup_members_2014$member_id)]

# Nodes with no joining date details
sum(is.na(V(g)$joined_mu))

## Removing them
g <- g - V(g)[is.na(V(g)$joined_mu)]
```

```{r, eval = FALSE}
# Counting potential recruiting ties
social_recruitment <- function(vindex, g, days) {
  # print(vindex)
  if(is.na(V(g)$joined_fb[vindex])) {
    return(NA)
  } else if(as.Date(V(g)$joined_mu[vindex]) < 
            as.Date(paste0(V(g)$joined_fb[vindex], "-01-01"), 
                    format="%Y-%m-%d", origin='1970-01-01')) {
    return(NA)
  } else {
    n <- 
      sum(as.Date(paste0("01 ", E(g)[from(vindex)]$since), format=" %d %B %Y") + days 
          < as.Date(V(g)$joined_mu[vindex]))
    # n <- vcount(neigh)
    return(n)
  }
}

# Computationally intense
V(g)$recruting_friends_90 <- sapply(1:vcount(g), social_recruitment, g, 90)

save(g, file="replication_file_04_friendship_graph.RData")

```

## Create and save anonymised version

```{r, eval = FALSE}

load("replication_file_04_friendship_graph.RData")

g_anonymised <- g
g_anonymised <- 
  delete_vertex_attr(g_anonymised, "name")
g_anonymised <- 
  delete_vertex_attr(g_anonymised, "member_id")
g_anonymised <- 
  delete_vertex_attr(g_anonymised, "lon")
g_anonymised <- 
  delete_vertex_attr(g_anonymised, "lat")
g_anonymised <- 
  delete_edge_attr(g_anonymised, "grew_up")
g_anonymised <- 
  delete_edge_attr(g_anonymised, "living")


save(g_anonymised, file="replication_file_05_friendship_graph_anonymised.RData")
```


# Recruitment network (based on at least 90-day Facebook relationship)

```{r, eval = TRUE}

load("replication_file_04_friendship_graph.RData")

friendship_edgelist <- as.data.frame(get.edgelist(g))
friendship_edgelist$friends <- E(g)$friends
friendship_edgelist$since <- E(g)$since
friendship_edgelist$grew_up <- E(g)$grew_up
friendship_edgelist$living <- E(g)$living

friendship_edgelist$joined_mu_A <- 
  V(g)$joined_mu[match(friendship_edgelist$V1, V(g)$name)]
friendship_edgelist$joined_mu_B <- 
  V(g)$joined_mu[match(friendship_edgelist$V2, V(g)$name)]
friendship_edgelist$A_joined_first <- 
  friendship_edgelist$joined_mu_A < friendship_edgelist$joined_mu_B


returnV1 <- function(a, b, bool) {if (bool==TRUE) {return(a)} else {return(b)}}
returnV2 <- function(a, b, bool) {if (bool==TRUE) {return(b)} else{return(a)}}

recruitment_edgelist <- data.frame(V1=rep(NA, nrow(friendship_edgelist)),
                                   V2=rep(NA, nrow(friendship_edgelist)),
                                   recruiting=rep(NA, nrow(friendship_edgelist)),
                                   when=rep(NA, nrow(friendship_edgelist)))

recruitment_edgelist$V1[friendship_edgelist$A_joined_first==TRUE] <- 
  as.character(friendship_edgelist$V1[friendship_edgelist$A_joined_first==TRUE])
recruitment_edgelist$V1[friendship_edgelist$A_joined_first==FALSE] <- 
  as.character(friendship_edgelist$V2[friendship_edgelist$A_joined_first==FALSE])

recruitment_edgelist$V2[friendship_edgelist$A_joined_first==TRUE] <- 
  as.character(friendship_edgelist$V2[friendship_edgelist$A_joined_first==TRUE])
recruitment_edgelist$V2[friendship_edgelist$A_joined_first==FALSE] <- 
  as.character(friendship_edgelist$V1[friendship_edgelist$A_joined_first==FALSE])

recruitment_edgelist$when[friendship_edgelist$A_joined_first==TRUE] <- 
  friendship_edgelist$joined_mu_B[friendship_edgelist$A_joined_first==TRUE]
recruitment_edgelist$when[friendship_edgelist$A_joined_first==FALSE] <- 
  friendship_edgelist$joined_mu_A[friendship_edgelist$A_joined_first==FALSE]

recruitment_edgelist$recruiting <-
  (as.Date(paste0("01 ", 
                  friendship_edgelist$since), format=" %d %B %Y") + 90) < 
  as.Date(recruitment_edgelist$when)

recruitment_edgelist <- subset(recruitment_edgelist, recruiting==TRUE)

members <- meetup_members_2014

# Geo
recruitment_edgelist$lonA <- 
  members$lon[match(recruitment_edgelist$V1, members$facebook)]
recruitment_edgelist$latA <- 
  members$lat[match(recruitment_edgelist$V1, members$facebook)]
recruitment_edgelist$lonB <- 
  members$lon[match(recruitment_edgelist$V2, members$facebook)]
recruitment_edgelist$latB <- 
  members$lat[match(recruitment_edgelist$V2, members$facebook)]

require(geosphere)
recruitment_edgelist$distance <- 
  distVincentySphere(
    cbind(recruitment_edgelist$lonA, recruitment_edgelist$latA),
    cbind(recruitment_edgelist$lonB, recruitment_edgelist$latB))

g_recruitment <- 
  graph.data.frame(recruitment_edgelist[,c("V1","V2","when","distance")],
                   directed=TRUE,
                   vertices=unique(data.frame(name = c(recruitment_edgelist$V1,
                                                       recruitment_edgelist$V2),
                                              lon = c(recruitment_edgelist$lonA,
                                                      recruitment_edgelist$lonB),
                                              lat = c(recruitment_edgelist$latA,
                                                      recruitment_edgelist$latB))
                   ))

V(g_recruitment)$joined_mu <- V(g)$joined_mu[match(V(g_recruitment)$name, V(g)$name)]
V(g_recruitment)$joined_fb <- V(g)$joined_fb[match(V(g_recruitment)$name, V(g)$name)]

# Adding vertices removed from the recruitment network (because of no recruiting tie)
missing_vertices <- vertexAttributesAsDataFrame(g)
missing_vertices <- missing_vertices[!missing_vertices$name %in% V(g_recruitment)$name,]
# THIS MUST BE TRUE: nrow(missing_vertices) + vcount(g_recruitment) == vcount(g)
g_recruitment <- 
  g_recruitment %>% 
  add_vertices(nrow(missing_vertices), 
               attr = list(name = as.character(missing_vertices$name),
                           lon = missing_vertices$lon,
                           lat = missing_vertices$lat,
                           joined_mu = as.character(missing_vertices$joined_mu),
                           joined_fb = as.character(missing_vertices$joined_fb)))

# Nodes
vcount(g_recruitment)

# Nodes with at least one recruiting ties
sum(igraph::degree(g_recruitment, mode="in") > 0)
sum(igraph::degree(g_recruitment, mode="in") > 0) / vcount(g_recruitment)

# Nodes that joined Meetup.com after joining Facebook.com
sum(as.Date(V(g_recruitment)$joined_mu) >=
      as.Date(paste0(V(g_recruitment)$joined_fb, "-01-01"), 
              format="%Y-%m-%d", origin='1970-01-01'), na.rm=TRUE)
sum(as.Date(V(g_recruitment)$joined_mu) >=
      as.Date(paste0(V(g_recruitment)$joined_fb, "-01-01"), 
              format="%Y-%m-%d", origin='1970-01-01'), na.rm=TRUE) /
  vcount(g_recruitment)

save(g_recruitment, file = 'replication_file_06_recruitment_graph.RData')

```

## Figure 4

```{r,  fig.width  = 8, fig.height = 3.5}

load("replication_file_06_recruitment_graph.RData")

italy_box_y <- c(36, 47.5)
italy_box_x <- c(6.1, 19.5)

italy_seq_x <- seq(italy_box_x[1], italy_box_x[2], length.out = 51)
italy_seq_y <- seq(italy_box_y[1], italy_box_y[2], length.out = 51)

which_g <- 
  V(g)$lon >= italy_box_x[1] & V(g)$lon <= italy_box_x[2] &
  V(g)$lat >= italy_box_y[1] & V(g)$lat <= italy_box_y[2]
lon_g <- V(g)$lon[which_g]
lat_g <- V(g)$lat[which_g]

which_mu <- 
  meetup_members_2014$lon >= 
  italy_box_x[1] & meetup_members_2014$lon <= italy_box_x[2] &
  meetup_members_2014$lat >= 
  italy_box_y[1] & meetup_members_2014$lat <= italy_box_y[2]
lon_mu <- meetup_members_2014$lon[which_mu]
lat_mu <- meetup_members_2014$lat[which_mu]

lon_mu <- 
  cut(lon_mu, breaks = italy_seq_x, labels = italy_seq_x[1:50])
lat_mu <- 
  cut(lat_mu, breaks = italy_seq_y, labels = italy_seq_y[1:50])

lon_g <- 
  cut(lon_g, breaks = italy_seq_x, labels = italy_seq_x[1:50])
lat_g <- 
  cut(lat_g, breaks = italy_seq_y, labels = italy_seq_y[1:50])

g_lonlat_df <- 
  data.frame(lon_g, lat_g) %>% 
  group_by(lon_g, lat_g) %>% 
  summarize(n = n())
mu_lonlat_df <- 
  data.frame(lon_mu, lat_mu) %>% 
  group_by(lon_mu, lat_mu) %>% 
  summarize(n = n())

g_mu_lonlat_df <- 
  merge(g_lonlat_df, mu_lonlat_df, 
        by.x = c('lon_g','lat_g'), 
        by.y = c('lon_mu','lat_mu'))

g_mu_lonlat_df$n_perc.x <- 
  g_mu_lonlat_df$n.x / sum(g_mu_lonlat_df$n.x)
g_mu_lonlat_df$n_perc.y <- 
  g_mu_lonlat_df$n.y / sum(g_mu_lonlat_df$n.y) 

g_mu_lonlat_df$diff <- with(g_mu_lonlat_df, n_perc.x - n_perc.y)

grid.arrange(
  ggplot(g_lonlat_df, 
         aes(x = as.numeric(lon_g), y = as.numeric(lat_g), 
             fill = sqrt(n))) + geom_tile() +
    scale_fill_gradient(low = '#edf8b1', high = '#2c7fb8', na.value = 'white') + 
    theme_bw() +
    labs(x=NULL, y=NULL, title = "Network members (n=8132)") + 
    theme(axis.title=element_blank(),
          axis.text=element_blank(),
          axis.ticks=element_blank()),
  
  ggplot(mu_lonlat_df, 
         aes(x = as.numeric(lon_mu), 
             y = as.numeric(lat_mu),
             fill = sqrt(n))) + 
    geom_tile() +
    scale_fill_gradient(low = '#edf8b1', high = '#2c7fb8', na.value = 'white') + 
    theme_bw() +
    labs(x=NULL, y=NULL, title = "Meetup members (n=97,808)") +
    theme(axis.title=element_blank(),
          axis.text=element_blank(),
          axis.ticks=element_blank()),
  ncol = 2)

pdf(file = "figure/heatmap-dist-mu-g.pdf",
    width  = 8, height = 3.5)
grid.arrange(
  ggplot(g_lonlat_df, 
         aes(x = as.numeric(lon_g), 
             y = as.numeric(lat_g), 
             fill = sqrt(n))) + 
    geom_tile() +
    scale_fill_gradient(low = '#edf8b1', high = '#2c7fb8', na.value = 'white') + 
    theme_bw() +
    labs(x=NULL, y=NULL, title = "Network members (n=8132)") +
    theme(axis.title=element_blank(),
          axis.text=element_blank(),
          axis.ticks=element_blank(),
          text = element_text(family = "Palatino")),
  
  ggplot(mu_lonlat_df, 
         aes(x = as.numeric(lon_mu), 
             y = as.numeric(lat_mu), 
             fill = sqrt(n))) + 
    geom_tile() +
    scale_fill_gradient(low = '#edf8b1', high = '#2c7fb8', na.value = 'white') + 
    theme_bw() +
    labs(x=NULL, y=NULL, title = "Meetup members (n=97,808)") +
    theme(axis.title=element_blank(),
          axis.text=element_blank(),
          axis.ticks=element_blank(),
          text = element_text(family = "Palatino")),
  ncol = 2)
dev.off()
```

## Figure 5

```{r, fig.width  = 20, fig.height = 10}

member15 <- meetup_members_2015
member15$joined <- as.Date(member15$joined)

g_el <- as_edgelist(g)
g_el_lonlat_df <- data.frame(lon.x = V(g)$lon[match(g_el[,1], V(g)$name)],
                             lat.x = V(g)$lat[match(g_el[,1], V(g)$name)],
                             lon.y = V(g)$lon[match(g_el[,2], V(g)$name)],
                             lat.y = V(g)$lat[match(g_el[,2], V(g)$name)])

g_el_lonlat_df$dist <- distVincentySphere(g_el_lonlat_df[,1:2], g_el_lonlat_df[,3:4])
E(g)$distance <- g_el_lonlat_df$dist

sequence <- seq(from = min(member15$joined), max(member15$joined), by = 30)
ts_meetup_df <- as.data.frame(table(cut(member15$joined, breaks = sequence, 
                                        labels = sequence[2:length(sequence)])))
ts_meetup_df$Prop <- ts_meetup_df$Freq / sum(ts_meetup_df$Freq)
ts_meetup_df$cumsum <- cumsum(ts_meetup_df$Prop)
ts_meetup_df$what <- "Meetup"

ts_g_df <- as.data.frame(table(cut(as.Date(V(g)$joined_mu), breaks = sequence, 
                                   labels = sequence[2:length(sequence)])))

ts_g_df$Prop <- ts_g_df$Freq / sum(ts_g_df$Freq) 
ts_g_df$cumsum <- cumsum(ts_g_df$Prop)
ts_g_df$what <- "undirected social network"

ts_df <- rbind(ts_meetup_df, ts_g_df)
ts_diff_df <- merge(ts_meetup_df, ts_g_df, by = "Var1")
ts_diff_df$diff <- ts_diff_df$Prop.y - ts_diff_df$Prop.x

p1 <-
  ggplot(ts_df, aes(x=as.Date(Var1), y=Prop, colour=what)) +
  geom_line(size = 2) + scale_y_continuous(label = percent) + theme_bw() + 
  labs(y=NULL, colour=NULL, x="Distribution of joining date in time (%)") +
  theme(legend.position = 'bottom', text = element_text(size=30))

p2 <-
  ggplot(ts_diff_df, aes(x=as.Date(Var1), y=diff*100)) +
  geom_line(size = 2) + theme_bw() + labs(y=NULL, x="Difference (% points)") +
  theme(text = element_text(size=30))

ggarrange(p1, p2, ncol = 1)
```

```{r}
p1 <-
  ggplot(ts_df, aes(x=as.Date(Var1), y=Prop, colour=what)) +
  geom_line(size = 2) + scale_y_continuous(label = percent) + theme_bw() + 
  labs(y=NULL, colour=NULL, x="Distribution of joining date in time (%)") +
  theme(legend.position = 'bottom',
        text = element_text(family = "Palatino"))

p2 <-
  ggplot(ts_diff_df, aes(x=as.Date(Var1), y=diff*100)) +
  geom_line(size = 2) + theme_bw() + labs(y=NULL, x="Difference (% points)") +
  theme(text = element_text(family = "Palatino"))

pdf(file = "figure/ts-mu-g-joining-date.pdf",
    width  = 10, height = 5)
ggarrange(p1, p2, ncol = 1)
dev.off()
```


# Data analysis

## Spatial polygons of Italian regions

Shapefiles are produced by Istat and also available here: https://www.istat.it/it/archivio/124086

```{r}
region_2013.sp <- readOGR('.', 'Reg01012013_g_WGS84')
region_2013.sp <- 
  spTransform(region_2013.sp, 
              "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
region_2013.sp$COD_REG <- as.numeric(as.character(region_2013.sp$COD_REG))
```


## ISTAT Survey

The following chunks are not replicable if you don't have access to `DF_AVQ_A2013.RData`, which is password protected in this repository (as third party data). You can get download the data package from the Istat website at this page: www.istat.it/en/archivio/129959 (accessed October 2018).  

```{r}
load("DF_AVQ_A2013.RData")

asIndexComp <- function(x) {
  x <- as.numeric(x)
  x[is.na(x)] <- 0
  x
}

reverse <- function(x) {
  if (is.na(x)) {
    res <- NA 
  } else if (x == 1) {
    res <- 3
  } else if (x == 2) {
    res <- 2
  } else if (x == 3) {
    res <- 1
  } else if (x == 4) {
    res <- 0
  } else {
    res <- NA
  }
  return(res)
}

nrow(DF_AVQ_A2013)

# Demographics
DF_AVQ_A2013$male <- DF_AVQ_A2013$sesso == 1
DF_AVQ_A2013$age <- rescale(DF_AVQ_A2013$eta, from = c(1,18), to = c(0,1))
DF_AVQ_A2013$married <- DF_AVQ_A2013$stciv == 2
DF_AVQ_A2013$unemployed <-  DF_AVQ_A2013$cond %in% 2:3
DF_AVQ_A2013$income <- as.factor(DF_AVQ_A2013$redpr)
levels(DF_AVQ_A2013$income) <- c('emp','selfemp', 'pens', 'soc', 'rev', 'fam')
DF_AVQ_A2013$tv_hours <- DF_AVQ_A2013$hhtel
DF_AVQ_A2013$tv_hours[DF_AVQ_A2013$tv_hours == 99] <- NA
DF_AVQ_A2013$edu <- NA
DF_AVQ_A2013$edu[DF_AVQ_A2013$istr %in% 10:11] <- 1
DF_AVQ_A2013$edu[DF_AVQ_A2013$istr==9] <- 2
DF_AVQ_A2013$edu[DF_AVQ_A2013$istr==7] <- 3
DF_AVQ_A2013$edu[DF_AVQ_A2013$istr %in% 1:2] <- 4

# Geography
region_labels <- 
  c("Piemonte", "Valle d'Aosta/Vallée d'Aoste", "Lombardia", "Bolzano", "Trento", "Veneto", 
    "Friuli-Venezia Giulia", "Liguria", "Emilia-Romagna", "Toscana", "Umbria", "Marche", 
    "Lazio", "Abruzzo", "Molise", "Campania", "Puglia", "Basilicata", "Calabria", "Sicilia", 
    "Sardegna", "Trentino-Alto Adige/Südtirol")
names(region_labels) <- 
  c("010", "020", "030", "041", "042", "050", 
    "060", "070", "080", "090", 
    "100", "110", "120", "130", 
    "140", "150", "160", "170", 
    "180", "190", "200", "040")
DF_AVQ_A2013$reg[DF_AVQ_A2013$reg %in% c(41,42)] <- 040
DF_AVQ_A2013$COD_REG <- as.numeric(gsub("0$", "", DF_AVQ_A2013$reg))
DF_AVQ_A2013$DEN_REG <-  
  region_labels[match(sprintf("%03d", DF_AVQ_A2013$reg), names(region_labels))]
DF_AVQ_A2013$DEN_REG <- 
  factor(DF_AVQ_A2013$DEN_REG,
         levels = region_labels[order(names(region_labels))])
DF_AVQ_A2013$north <- 
  (!DF_AVQ_A2013$DEN_REG %in% 
     c("Abruzzo", "Puglia", "Basilicata", 
       "Campania", "Calabria", "Molise", "Sicilia", "Sardegna"))
DF_AVQ_A2013$north[is.na(DF_AVQ_A2013$DEN_REG)] <- NA

DF_AVQ_A2013$north_south <- ifelse(DF_AVQ_A2013$north, "North", "South")
prop.table(table(DF_AVQ_A2013$north_south))

# Social capital: networking
## `sc_net` without political participation
DF_AVQ_A2013$sc_net <-
  as.logical(as.numeric(with(DF_AVQ_A2013,
                             asIndexComp(volon==4) + 
                               asIndexComp(psind == 4) + 
                               asIndexComp(pgrvo == 6) + 
                               asIndexComp(paeco == 2) + 
                               asIndexComp(pcult == 4) + 
                               asIndexComp(paspro == 6) > 0)))
summary(DF_AVQ_A2013$sc_net)
prop.table(table(DF_AVQ_A2013$sc_net))

## Political participation (active)
DF_AVQ_A2013$pol_part_act <- 
  as.numeric(with(DF_AVQ_A2013, 
                  asIndexComp(ppapo == 2) + 
                    asIndexComp(volpa == 8))) > 0
summary(DF_AVQ_A2013$pol_part_act)
prop.table(table(DF_AVQ_A2013$pol_part_act))

## `sc_net` now includes also political participation
DF_AVQ_A2013$sc_net <- 
  DF_AVQ_A2013$sc_net == TRUE | DF_AVQ_A2013$pol_part_act == TRUE
summary(DF_AVQ_A2013$sc_net)
prop.table(table(DF_AVQ_A2013$sc_net))

# `coemicro` is the weighting variable for regional-level averages
DF_AVQ_A2013$coemicro <- 
  as.numeric(as.character(DF_AVQ_A2013$coemicro))

# Social capital: Trust.
DF_AVQ_A2013$sc_trust <-
  rescale(
    with(DF_AVQ_A2013,sapply(fidu3, reverse)),
    to = c(0, 1),
    from = c(0, 3))
summary(DF_AVQ_A2013$sc_trust)

# Logistic regression
log_sc_net <- 
  glm(sc_net ~ age + male + married + edu + unemployed + tv_hours + income + north_south,
      data = DF_AVQ_A2013,
      family = 'binomial')

lm_sc_trust <- 
  glm(sc_trust ~ sc_net + age + male + married + edu + unemployed + tv_hours + income + north_south,
      data = DF_AVQ_A2013,
      family = 'binomial')

# Survey responses averaged at regional level
survey_reg_avg <- 
  srvyr::as_survey_design(DF_AVQ_A2013, strata = COD_REG, weights = coemicro)

survey_reg_avg <-
  survey_reg_avg %>%
  group_by(COD_REG) %>%
  summarize_at(vars(sc_net, sc_trust, pol_part_act), 
               survey_mean, 
               vartype = "ci", 
               na.rm = TRUE)

survey_reg_avg$DEN_REG <- 
  region_2013.sp@data$DEN_REG[match(survey_reg_avg$COD_REG, 
                                    region_2013.sp@data$COD_REG)]
survey_reg_avg$DEN_REG <- gsub("/(.*)$", "", survey_reg_avg$DEN_REG)
survey_reg_avg$DEN_REG <- factor(survey_reg_avg$DEN_REG, levels = rev(survey_reg_avg$DEN_REG))

ggplot(survey_reg_avg) +
  geom_point(aes(y=sc_net, x=DEN_REG)) + 
  geom_errorbar(aes(ymin = sc_net_low, ymax = sc_net_upp, x=DEN_REG)) + 
  coord_flip() +
  theme_bw()

ggplot(survey_reg_avg) +
  geom_point(aes(y=sc_trust, x=DEN_REG)) + 
  geom_errorbar(aes(ymin = sc_trust_low, ymax = sc_trust_upp, x=DEN_REG)) + 
  coord_flip() +
  theme_bw()

ggplot(survey_reg_avg) +
  geom_point(aes(y=pol_part_act, x=DEN_REG)) + 
  geom_errorbar(aes(ymin = pol_part_act_low, ymax = pol_part_act_upp, x=DEN_REG)) + 
  coord_flip() +
  theme_bw()

survey_reg_avg$north_south <- ifelse(survey_reg_avg$COD_REG < 13, 'North', 'South')

```

### Figure 6

```{r, fig.width = 5, fig.height = 4}

ggplot(survey_reg_avg) +
  geom_point(aes(x=sc_net, y=sc_trust, colour = north_south )) + 
  geom_errorbar(aes(x=sc_net, ymin=sc_trust_low, ymax=sc_trust_upp, colour = north_south), 
                width = 0) +
  geom_errorbarh(aes(y=sc_trust, xmin=sc_net_low, xmax=sc_net_upp, colour = north_south)) +
  theme_bw() + 
  labs(colour=NULL)

ggsave(filename = "figure/sc-ita-regions.pdf", dpi = 300, 
       width = 5, height = 4,
       ggplot(survey_reg_avg) +
         geom_point(aes(x=sc_net, y=sc_trust, colour = north_south )) + 
         geom_errorbar(aes(x=sc_net, ymin=sc_trust_low, ymax=sc_trust_upp, 
                           colour = north_south), 
                       width = 0) +
         geom_errorbarh(aes(y=sc_trust, xmin=sc_net_low, xmax=sc_net_upp, 
                            colour = north_south)) +
         theme_bw() + labs(colour=NULL))
```

### Figure 12

```{r}
region_2013.sf <- st_as_sf(region_2013.sp)

region_2013.sf$sc_trust <- 
  survey_reg_avg$sc_trust[match(region_2013.sf$COD_REG, survey_reg_avg$COD_REG)]
region_2013.sf$sc_net <- 
  survey_reg_avg$sc_net[match(region_2013.sf$COD_REG, survey_reg_avg$COD_REG)]

grid.arrange(
  ggplot(region_2013.sf) + geom_sf(aes(fill = sc_net), colour = 'black', size = .05) + 
    scale_fill_gradient(low = '#ffeda0', high = '#f03b20', label = percent) + theme_bw() + 
    theme(legend.position = 'bottom', legend.key.width=unit(2,"cm")) ,
  ggplot(region_2013.sf) + geom_sf(aes(fill = sc_trust), colour = 'black', size = .05) + 
    scale_fill_gradient(low = '#ffeda0', high = '#2c7fb8')  + theme_bw() + 
    theme(legend.position = 'bottom', legend.key.width=unit(1,"cm")),
  ncol=2)

```

```{r}
pdf(file = "figure/map-ita-reg-sc.pdf", width  = 9, height = 4.5)
grid.arrange(
  ggplot(region_2013.sf) + geom_sf(aes(fill = sc_net), colour = 'black', size = .05) + 
    scale_fill_gradient(low = '#ffeda0', high = '#f03b20', label = percent) + theme_bw() + 
    theme(legend.position = 'bottom', legend.key.width=unit(1.4,"cm"),
          text = element_text(family = "Palatino")) ,
  ggplot(region_2013.sf) + geom_sf(aes(fill = sc_trust), colour = 'black', size = .05) + 
    scale_fill_gradient(low = '#ffeda0', high = '#2c7fb8')  + theme_bw() + 
    theme(legend.position = 'bottom', legend.key.width=unit(1.4,"cm"), 
          text = element_text(family = "Palatino")),
  ncol=2)
```

### Table 2

```{r, results='asis'}
stargazer(log_sc_net, lm_sc_trust)
```

```{r, results='hide'}
stargazer(log_sc_net, lm_sc_trust,
          type = 'latex', out = 'table/sc-net-survey-mod.tex',
          report = "vc*",
          float = FALSE,
          style = 'ajps')
stargazer(log_sc_net, lm_sc_trust,
          type = 'html', out = 'table/sc-net-survey-mod.html',
          report = "vc*",
          float = FALSE,
          style = 'ajps')
```

### Logistic regression prediction: MALE

```{r}
pr <-
  predict(log_sc_net, 
          newdata = data.frame(age = 0.6470588,
                               male = TRUE,
                               married = FALSE,
                               edu = 2,
                               unemployed = FALSE,
                               tv_hours = 2,
                               income = 'emp',
                               north_south = c('North', 'South')), 
          type="response", se.fit = TRUE)
pr
```

```{r}
pr_df <- data.frame(where = c('north','south'),
                    fit = pr$fit,
                    se.fit = pr$se.fit)
ggplot(pr_df, aes(x = where, y = fit)) +
  geom_point() +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit)) + 
  theme_bw() + 
  labs(x = NULL, y = "Probability respondent is connected")
```

### Logistic regression prediction: FEMALE

```{r}
pr <-
  predict(log_sc_net, 
          newdata = data.frame(age = 0.6470588,
                               male = FALSE,
                               married = FALSE,
                               edu = 2,
                               unemployed = FALSE,
                               tv_hours = 2,
                               income = 'emp',
                               north_south = c('North', 'South')), 
          type="response", se.fit = TRUE)
pr
```

```{r}
pr_df <- data.frame(where = c('north','south'),
                    fit = pr$fit,
                    se.fit = pr$se.fit)
ggplot(pr_df, aes(x = where, y = fit)) +
  geom_point() +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit)) + 
  theme_bw() + 
  labs(x = NULL, y = "Probability respondent is connected")
```


## Diffusion

### Create geographic grid

The geographic grid is a simplified version of the GEOSTAT 2011. The file is also available from https://www.istat.it/it/archivio/155162.

```{r eval = FALSE}
# See https://www.istat.it/it/archivio/155162
geostat_grid <- readOGR(".",'GEOSTAT_grid_POP_1K_IT_2011')

geostat_grid_simp.sf <- st_as_sf(geostat_grid)
geostat_grid_simp.sf <- geostat_grid_simp.sf %>% filter(POP > 0)
geostat_grid_simp.sf$GRID_ID_simp <- gsub("\\dE", "E", geostat_grid_simp.sf$GRID_ID)
geostat_grid_simp.sf$GRID_ID_simp <- gsub("\\d$", "", geostat_grid_simp.sf$GRID_ID_simp)

geostat_grid_simp.sf <- 
  geostat_grid_simp.sf %>% 
  group_by(GRID_ID_simp) %>% 
  summarize(pop = sum(POP))

geostat_grid_simp.sf <- 
  geostat_grid_simp.sf %>% 
  filter(pop > 0)

save(geostat_grid_simp.sf, file = 'replication_file_07_geostat_grid.RData')
```

```{r, cache = TRUE}
load('replication_file_07_geostat_grid.RData')
ggplot(geostat_grid_simp.sf) + 
  geom_sf(aes(fill=pop), color = 'grey', size = 0.05) + 
  scale_fill_gradient(low = "white", high = "red") + 
  theme_bw()
```

```{r}
# Number of cells
nrow(geostat_grid_simp.sf)

# Mean area
mean(st_area(geostat_grid_simp.sf))
```


### Number of Meetup members by grid cells

To replicate the following chunks: 

```{bash, eval = FALSE}
R -e 'setwd("/path/to/directory"); 
source("replication_code_06_measure_geographic_diffusion.R");'
```

```{r, echo = TRUE}
read_chunk('replication_code_06_measure_geographic_diffusion.R')
```


```{r measure-geographic-diffusion, eval = FALSE}

```

### Figure 7

```{r, fig.width = 8, fig.height = 4}
load("replication_file_08_geographic_diffusion.RData")

# Average number of residents for each Meetup member
1 / 
  mean(members_pop_grid_dt$members[
    members_pop_grid_dt$date == max(members_pop_grid_dt$date) &
      members_pop_grid_dt$pop > 0] /
      members_pop_grid_dt$pop[
        members_pop_grid_dt$date == max(members_pop_grid_dt$date) &
          members_pop_grid_dt$pop > 0])

# Calculate density from entire meetup dataset
meetup_members <- meetup_members_2015
meetup_members <- meetup_members[meetup_members$country %in% c("it"),]
meetup_members$joined <- as.Date(meetup_members$joined)
meetup_members.sp <- 
  SpatialPointsDataFrame(meetup_members[,c('lon','lat')],  
                         data = 
                           meetup_members,
                         proj4string = 
                           CRS('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'))

geostat_grid.sp <- as(geostat_grid_simp.sf,'Spatial')
geostat_grid.sp <- 
  spTransform(geostat_grid.sp, 
              '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0')
res <- over(geostat_grid.sp, region_2013.sp)
geostat_grid.sp$COD_REG <- res$COD_REG
geostat_grid.sp$DEN_REG <- as.character(res$DEN_REG)

ita_popgrid <- geostat_grid.sp[,c('GRID_ID_simp','pop','COD_REG')]
ita_cells <- ita_popgrid$GRID_ID_simp[!is.na(ita_popgrid$COD_REG)]
north_cells <- ita_popgrid$GRID_ID_simp[ita_popgrid$COD_REG < 13]
south_cells <- ita_popgrid$GRID_ID_simp[ita_popgrid$COD_REG >= 13]
ita_pop <- sum(ita_popgrid$pop)
north_pop <- sum(ita_popgrid$pop[ita_popgrid$COD_REG < 13], na.rm = T)
south_pop <- sum(ita_popgrid$pop[ita_popgrid$COD_REG >= 13], na.rm = T)

# A cell is activated if a person every 2000 is a member (1*0.0005)
stats_dt <- 
  members_pop_grid_dt[!is.na(members_pop_grid_dt$COD_REG)] %>% 
  group_by(date) %>% 
  summarize(ita_pop = sum(pop[GRID_ID_simp %in% ita_cells & members > pop*0.0005]) / 
              ita_pop,
            north_pop = sum(pop[GRID_ID_simp %in% north_cells & members > pop*0.0005]) / 
              north_pop,
            south_pop = sum(pop[GRID_ID_simp %in% south_cells & members > pop*0.0005]) / 
              south_pop)

stats_melted_dt <- melt(stats_dt, id.vars = c('date'))
stats_melted_dt <- stats_melted_dt[stats_melted_dt$variable != 'ita_pop',]
stats_melted_dt$what <- ifelse(stats_melted_dt$variable == "north_pop", "North", "South")

ggplot(stats_melted_dt, aes(x=date, y=value, colour = what)) + 
  geom_line() + 
  scale_y_continuous(label = percent) + 
  labs(x="Territorial diffusion (as % of population)", y=NULL, colour=NULL) +
  geom_vline(xintercept = as.Date('2007-06-14'), 
             size = .3, linetype = 2, alpha  = .2) + 
  geom_vline(xintercept = as.Date('2007-09-08'), 
             size = .3, linetype = 2, alpha  = .2) + 
  geom_vline(xintercept = as.Date('2012-10-28'), 
             size = .3, linetype = 2, alpha  = .2) + 
  annotate("text", x = as.Date("2006-06-01"), 
           y = 0.15, label = "V-Day\nis announced", 
           size = 2.5) +
  geom_segment(aes(x=as.Date("2006-06-10"), xend=as.Date('2007-06-01'), 
                   y=0.12, yend=0.09), 
               size = .3, arrow = arrow(length = unit(0.2, "cm")), 
               colour = 'black') +
  annotate("text", x = as.Date("2008-08-31"), y = 0.05, 
           label = "V-Day\ntakes place", 
           size = 2.5) +
  geom_segment(aes(x=as.Date("2008-07-15"), xend=as.Date('2007-09-15'), 
                   y=0.05, yend=0.045), 
               size = .3, arrow = arrow(length = unit(0.2, "cm")), 
               colour = 'black') +
  annotate("text", x = as.Date("2012-01-31"), y = 0.45, 
           label = "Sicilian\nelection", 
           size = 2.5) +
  geom_segment(aes(x=as.Date("2012-01-31"), xend=as.Date('2012-10-10'), 
                   y=0.42, yend=0.25), 
               size = .3, arrow = arrow(length = unit(0.2, "cm")), colour = 'black') +
  theme_bw() + theme(legend.position = c(0.9, 0.2))

```

```{r}
ggsave(filename = "figure/ts-meetup-diffusion.pdf", dpi = 300, width = 8, height = 4,
       ggplot(stats_melted_dt, aes(x=date, y=value, colour = what)) + 
         geom_line() + 
         scale_y_continuous(label = percent) + 
         labs(x="Territorial diffusion (as % of population)", y=NULL, colour=NULL) +
         geom_vline(xintercept = as.Date('2007-06-14'), 
                    size = .3, linetype = 2, alpha  = .2) + 
         geom_vline(xintercept = as.Date('2007-09-08'), 
                    size = .3, linetype = 2, alpha  = .2) + 
         geom_vline(xintercept = as.Date('2012-10-28'), 
                    size = .3, linetype = 2, alpha  = .2) + 
         annotate("text", x = as.Date("2006-06-01"), 
                  y = 0.15, label = "V-Day\nis announced", 
                  size = 3) +
         geom_segment(aes(x=as.Date("2006-06-10"), xend=as.Date('2007-06-01'), 
                          y=0.12, yend=0.09), 
                      size = .3, arrow = arrow(length = unit(0.2, "cm")), 
                      colour = 'black') +
         annotate("text", x = as.Date("2008-08-31"), y = 0.05, 
                  label = "V-Day\ntakes place", 
                  size = 3) +
         geom_segment(aes(x=as.Date("2008-07-15"), xend=as.Date('2007-09-15'), 
                          y=0.05, yend=0.045), 
                      size = .3, arrow = arrow(length = unit(0.2, "cm")), 
                      colour = 'black') +
         annotate("text", x = as.Date("2012-01-31"), y = 0.45, 
                  label = "Sicilian\nelection", 
                  size = 3) +
         geom_segment(aes(x=as.Date("2012-01-31"), xend=as.Date('2012-10-10'), 
                          y=0.42, yend=0.25), 
                      size = .3, arrow = arrow(length = unit(0.2, "cm")), 
                      colour = 'black') +
         theme_bw() + theme(legend.position = c(0.9, 0.2))
)
```

### Figure 11

```{r}
geostat_grid.sf <- geostat_grid_simp.sf

ita_grid_plot1 <- members_pop_grid_dt[date == '2007-09-15']
ita_grid_plot1 <- merge(geostat_grid.sf,ita_grid_plot1, by = 'GRID_ID_simp')
ita_grid_plot1$act <- ita_grid_plot1$members > (ita_grid_plot1$pop.x*0.0005)

ita_grid_plot2 <- members_pop_grid_dt[date == '2012-11-03']
ita_grid_plot2 <- merge(geostat_grid.sf, ita_grid_plot2, by = 'GRID_ID_simp')
ita_grid_plot2$act <- ita_grid_plot2$members > (ita_grid_plot2$pop.x*0.0005)

ita_grid_plot3 <- members_pop_grid_dt[date == '2013-03-02']
ita_grid_plot3 <- merge(geostat_grid.sf,ita_grid_plot3, by = 'GRID_ID_simp')
ita_grid_plot3$act <- ita_grid_plot3$members > (ita_grid_plot3$pop.x*0.0005)

ita_grid_plot4 <- members_pop_grid_dt[date == '2015-02-28']
ita_grid_plot4 <- merge(geostat_grid.sf,ita_grid_plot4, by = 'GRID_ID_simp')
ita_grid_plot4$act <- ita_grid_plot4$members > (ita_grid_plot4$pop.x*0.0005)

geostat_grid.sf$cut <- 
  cut(geostat_grid.sf$pop, 
      breaks = quantile(geostat_grid.sf$pop, probs = seq(0, 1, 0.1)),
      include.lowest = TRUE)

grid.arrange(
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .05, 
            fill = 'white') +
    labs(caption = '2007-09-15') + 
    geom_sf(data=ita_grid_plot1[ita_grid_plot1$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank()),
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .05, 
            fill = 'white') +
    labs(caption = '2012-11-03') + 
    geom_sf(data=ita_grid_plot2[ita_grid_plot2$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank()),
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .05, 
            fill = 'white') +
    labs(caption = '2013-03-02') + 
    geom_sf(data=ita_grid_plot3[ita_grid_plot3$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank()),
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .1, 
            fill = 'white') +
    labs(caption = '2015-02-28') + 
    geom_sf(data=ita_grid_plot4[ita_grid_plot3$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank()),
  ggplot() + 
    labs(caption = 'Population density (2011 census)') + 
    geom_sf(data=geostat_grid.sf, aes(fill=cut), colour = NA, show.legend=F) + 
    scale_fill_manual(values = c('white',brewer.pal(9, "YlGn"))) +
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank()),
  ncol = 3)
```


```{r}
pdf(file = "figure/map-ita-pop-act.pdf", width = 10, height = 20)
grid.arrange(
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .05, fill = 'white') +
    labs(caption = '2007-09-15') + 
    geom_sf(data=ita_grid_plot1[ita_grid_plot1$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank(),
                       text = element_text(family = "Palatino")),
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .05, fill = 'white') +
    labs(caption = '2012-11-03') + 
    geom_sf(data=ita_grid_plot2[ita_grid_plot2$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank(),
                       text = element_text(family = "Palatino")),
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .05, fill = 'white') +
    labs(caption = '2013-03-02') + 
    geom_sf(data=ita_grid_plot3[ita_grid_plot3$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank(),
                       text = element_text(family = "Palatino")),
  ggplot() + 
    geom_sf(data=region_2013.sf, colour = 'black', size = .1, fill = 'white') +
    labs(caption = '2015-02-28') + 
    geom_sf(data=ita_grid_plot4[ita_grid_plot3$act == TRUE,], 
            fill='red', colour = NA, show.legend=F) + 
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank(),
                       text = element_text(family = "Palatino")),
  ggplot() + 
    labs(caption = 'Population density (2011 census)') + 
    geom_sf(data=geostat_grid.sf, aes(fill=cut), colour = NA, show.legend=F) + 
    scale_fill_manual(values = c('white',brewer.pal(9, "YlGn"))) +
    theme_bw() + theme(axis.text.x=element_blank(),
                       axis.text.y=element_blank(),
                       axis.ticks=element_blank(),
                       axis.title.x=element_blank(),
                       axis.title.y=element_blank(),
                       text = element_text(family = "Palatino")),
  ncol = 2)
dev.off()
```

### Analysis of recruitment and Post-recruitment networks

To replicate the following chunks: 
```{}
R -e 'setwd("/path/to/directory"); 
source("replication_code_06_measure_geographic_diffusion.R");'
```

```{r, echo = TRUE}
read_chunk('replication_code_07_network_density.R')
```


```{r measure-network-density, eval = FALSE}

```


```{r}
load("replication_file_04_friendship_graph.RData")
load("replication_file_06_recruitment_graph.RData")

region_south_2013.sf <- region_2013.sf %>% filter(COD_REG >= 13)

lonlat_node_df <- data.frame(lon = V(g_recruitment)$lon,
                             lat = V(g_recruitment)$lat)
lonlat_node.sp <- 
  SpatialPoints(lonlat_node_df, 
                proj4string=CRS('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'))
res <- over(lonlat_node.sp, region_2013.sp)

V(g_recruitment)$DEN_REG <- as.character(res$DEN_REG)
V(g_recruitment)$COD_REG <- res$COD_REG
V(g_recruitment)$north_south <- 
  ifelse(V(g_recruitment)$COD_REG < 13, "North", "South")

```

#### Recruitment density

```{r}
indegree_north <-
  igraph::degree(g_recruitment - 
                   V(g_recruitment)[as.Date(V(g_recruitment)$joined_mu) > 
                                      as.Date('2012-10-28') |
                                      V(g_recruitment)$north_south %in% "South"],
                 mode = 'in')
indegree_south <-
  igraph::degree(g_recruitment - 
                   V(g_recruitment)[as.Date(V(g_recruitment)$joined_mu) > 
                                      as.Date('2012-10-28') |
                                      V(g_recruitment)$north_south %in% "North"],
                 mode = 'in')
indegree_test_df <- data.frame(degree = c(indegree_north, indegree_south),
                               where = c(rep('North',length(indegree_north)),
                                         rep('South',length(indegree_south))))

t.test(degree ~ where, data = indegree_test_df, var.equal = FALSE)
```

#### Post-recruitment density

```{r, cache = TRUE}

postrecruitment_el <- as.data.frame(get.edgelist(g))
postrecruitment_el$since <- as.Date(paste0("01 ", E(g)$since), "%d %B %Y")
postrecruitment_el$joined_A <- as.Date(V(g)$joined_mu[match(postrecruitment_el$V1, V(g)$name)])
postrecruitment_el$joined_B <- as.Date(V(g)$joined_mu[match(postrecruitment_el$V2, V(g)$name)])
postrecruitment_el <- subset(postrecruitment_el, since > joined_A & since > joined_B)
g_postrecruitment <- graph_from_data_frame(postrecruitment_el[,1:3], directed = F)
missing_v <- V(g)$name[! V(g)$name %in% V(g_postrecruitment)$name]
g_postrecruitment <- 
  g_postrecruitment %>% add_vertices(nv = length(missing_v), attr = list(name = missing_v))
V(g_postrecruitment)$joined_mu <- V(g)$joined_mu[match(V(g_postrecruitment)$name, V(g)$name)]
V(g_postrecruitment)$COD_REG <- 
  V(g_recruitment)$COD_REG[match(V(g_postrecruitment)$name, V(g_recruitment)$name)]

postrecruiment_density_df <- data.frame()
sequence <- 
  seq(from = min(as.Date(meetup_members_2015$joined)), 
      max(as.Date(meetup_members_2015$joined)), by = 30)
for (i in 1:length(sequence)) {
  
  this_g <- 
    g_postrecruitment - 
    V(g_postrecruitment)[as.Date(V(g_postrecruitment)$joined_mu) >= sequence[i]]
  
  this_g_north <- this_g - V(this_g)[V(this_g)$COD_REG %in% 13:20]
  this_g_south <- this_g - V(this_g)[V(this_g)$COD_REG %in% 1:12]
  
  postrecruiment_density_df <- 
    rbind(postrecruiment_density_df, data.frame(time = sequence[i],
                                                density = c(edge_density(this_g_north), 
                                                            edge_density(this_g_south)),
                                                where = c("North", "South")))
}


degree_north <-
  igraph::degree(g_postrecruitment - 
                   V(g_postrecruitment)[as.Date(V(g_postrecruitment)$joined_mu) > 
                                          as.Date('2012-10-28') |
                                          V(g_postrecruitment)$COD_REG %in% 13:20],
                 mode = 'all')
degree_south <-
  igraph::degree(g_postrecruitment - 
                   V(g_postrecruitment)[as.Date(V(g_postrecruitment)$joined_mu) > 
                                          as.Date('2012-10-28') |
                                          V(g_postrecruitment)$COD_REG %in% 1:12],
                 mode = 'all')
degree_test_df <- data.frame(degree = c(degree_north, degree_south),
                             where = c(rep('North',length(degree_north)),
                                       rep('South',length(degree_south))))

t.test(degree ~ where, data = degree_test_df, var.equal = FALSE)
```


### Figure 8


```{r, fig.width = 8, fig.height = 6}

load("replication_file_09_network_density.RData")

p1 <-
  ggplot(density_cum_dt[density_cum_dt$scope!='all' & 
                          density_cum_dt$time > as.Date("2008-01-01"),],
         aes(x=time, y=density, colour=scope)) +
  geom_line() + 
  geom_vline(xintercept = as.numeric(as.Date(c("2012-05-07", "2013-02-24"))), 
             linetype = 2, alpha = .5) +
  labs(y = NULL, x="Recruitment network density") +
  scale_y_continuous(labels = percent) + guides(colour=FALSE) + theme_bw()

p2 <-
  ggplot(postrecruiment_density_df[postrecruiment_density_df$time > 
                                     as.Date("2008-01-01"),],
         aes(x=time, y=density, colour=where)) +
  geom_line() + 
  geom_vline(xintercept = as.numeric(as.Date(c("2012-05-07", "2013-02-24"))), 
             linetype = 2, alpha = .5) +
  labs(y = NULL, x="Post-recruitment friendship network density") +
  scale_y_continuous(labels = percent) + guides(colour=FALSE) + theme_bw()

density_cum_dt$label <- ifelse(density_cum_dt$scope == "north", "North", "South")

p3 <-
  ggplot(density_cum_dt[density_cum_dt$scope!='all' & 
                          density_cum_dt$time > as.Date("2008-01-01"),],
         aes(x=time, y=n, colour=label)) +
  geom_line() + 
  geom_vline(xintercept = as.numeric(as.Date(c("2012-05-07", "2013-02-24"))), 
             linetype = 2, alpha = .5) +
  labs(y = NULL, x="Number of nodes", colour = NULL) + 
  theme(legend.position = 'bottom') + theme_bw()

egg::ggarrange(p1, p2, p3, ncol = 1)
```

```{r}

p1 <-
  ggplot(density_cum_dt[density_cum_dt$scope!='all' & 
                          density_cum_dt$time > as.Date("2008-01-01"),],
         aes(x=time, y=density, colour=scope)) +
  geom_line() + 
  geom_vline(xintercept = as.numeric(as.Date(c("2012-05-07", "2013-02-24"))), 
             linetype = 2, alpha = .5) +
  labs(y = NULL, x="Recruitment network density") +
  scale_y_continuous(labels = percent) + guides(colour=FALSE) + 
  theme_bw() +
  theme(legend.position = 'bottom', text = element_text(family = "Palatino"))

p2 <-
  ggplot(postrecruiment_density_df[postrecruiment_density_df$time > 
                                     as.Date("2008-01-01"),],
         aes(x=time, y=density, colour=where)) +
  geom_line() + 
  geom_vline(xintercept = as.numeric(as.Date(c("2012-05-07", "2013-02-24"))), 
             linetype = 2, alpha = .5) +
  labs(y = NULL, x="Post-recruitment friendship network density") +
  scale_y_continuous(labels = percent) + guides(colour=FALSE) + 
  theme_bw() + 
  theme(legend.position = 'bottom', text = element_text(family = "Palatino"))

density_cum_dt$label <- ifelse(density_cum_dt$scope == "north", "North", "South")

p3 <-
  ggplot(density_cum_dt[density_cum_dt$scope!='all' & density_cum_dt$time > 
                          as.Date("2008-01-01"),],
         aes(x=time, y=n, colour=label)) +
  geom_line() + 
  geom_vline(xintercept = as.numeric(as.Date(c("2012-05-07", "2013-02-24"))), 
             linetype = 2, alpha = .5) +
  labs(y = NULL, x="Number of nodes", colour = NULL) + 
  theme_bw() + 
  theme(legend.position = 'bottom', text = element_text(family = "Palatino"))

pdf(file = "figure/ts-net-density.pdf", width = 8, height = 6)
egg::ggarrange(p1, p2, p3, ncol = 1)
dev.off()
```

## Exponential Random Graph Models

### Create control variable `pre_active_buff_20km`

```{r, echo = TRUE}
read_chunk('replication_code_08_buffer_density.R')
```


```{r measure-buffer-density, eval = FALSE}

```

### Create control variable `gender`

```{r, echo = TRUE}
read_chunk('replication_code_09_gender_attribution.R')
```


```{r gender-attribution, eval = FALSE}

```

```{r}
load('replication_file_12_meetup_members_2014_gender.RData')
meetup_members_2014$gender <- 
  meetup_members_2014_gender$gender[match(meetup_members_2014$member_id,
                                          meetup_members_2014_gender$member_id)]
prop.table(table(meetup_members_2014$gender, useNA = 'always'))
```

### ERGM

```{r}
load("replication_file_10_net_active_buff_20km.RData")

region_2013.sp$sc_net <-  
  survey_reg_avg$sc_net[match(region_2013.sp$COD_REG, survey_reg_avg$COD_REG)]
region_2013.sp$sc_trust <-  
  survey_reg_avg$sc_trust[match(region_2013.sp$COD_REG, survey_reg_avg$COD_REG)]

g_recruitment_df <- 
  vertexAttributesAsDataFrame(addDegreeToVertices(g_recruitment))
g_recruitment_df$joined_mu_year <- 
  format(as.Date(g_recruitment_df$joined_mu), "%Y")
g_recruitment.sp <- 
  SpatialPoints(g_recruitment_df[,c('lon','lat')], 
                proj4string = 
                  CRS('+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'))

res <- 
  over(g_recruitment.sp, 
       spTransform(region_2013.sp,
                   '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'))
g_recruitment_df <- 
  cbind(g_recruitment_df, 
        res[,c("sc_net", "sc_trust" )])
g_recruitment_df$pre_active_buff_20km <- 
  net_active_buff_20km_df$pre_active_buff_20km[
    match(g_recruitment_df$name, net_active_buff_20km_df$name)]

g_recruitment_df$joined_mu <- 
  as.Date(as.character(g_recruitment_df$joined_mu))
g_recruitment_df$joined_mu_num <- 
  as.numeric(g_recruitment_df$joined_mu)
g_recruitment_df$gender <- 
  meetup_members_2014$gender[match(g_recruitment_df$name, 
                                   meetup_members_2014$facebook)]

## Add to network
V(g_recruitment)$sc_net <- 
  g_recruitment_df$sc_net[match(g_recruitment_df$name, 
                                V(g_recruitment)$name)]
V(g_recruitment)$sc_trust <- 
  g_recruitment_df$sc_trust[match(g_recruitment_df$name, 
                                  V(g_recruitment)$name)]
V(g_recruitment)$pre_active_buff_20km <- 
  g_recruitment_df$pre_active_buff_20km[
    match(g_recruitment_df$name, V(g_recruitment)$name)]
V(g_recruitment)$gender <- 
  g_recruitment_df$gender[match(g_recruitment_df$name, 
                                V(g_recruitment)$name)]

# Exclude nodes out of Italy
net_recruitment <- 
  asNetwork(g_recruitment - 
              V(g_recruitment)[is.na(V(g_recruitment)$COD_REG)])

mod_egrm_recruitment <- 
  ergm(net_recruitment ~ edges +
         nodeicov("pre_active_buff_20km") +
         nodeifactor("north_south", base = 1) +
         nodeifactor("gender", base = 1) +
         nodeofactor("gender", base = 1) +
         nodematch("gender") +
         nodeicov("sc_net") + nodeicov("sc_trust")
  )
net_recruitment
```

```{r}
V(g_postrecruitment)$pre_active_buff_20km <- 
  V(g_recruitment)$pre_active_buff_20km[
    match(V(g_postrecruitment)$name, V(g_recruitment)$name)]
V(g_postrecruitment)$north_south <- 
  V(g_recruitment)$north_south[
    match(V(g_postrecruitment)$name, V(g_recruitment)$name)]
V(g_postrecruitment)$gender <- 
  V(g_recruitment)$gender[
    match(V(g_postrecruitment)$name, V(g_recruitment)$name)]
V(g_postrecruitment)$sc_net <- 
  V(g_recruitment)$sc_net[
    match(V(g_postrecruitment)$name, V(g_recruitment)$name)]
V(g_postrecruitment)$sc_trust <- 
  V(g_recruitment)$sc_trust[
    match(V(g_postrecruitment)$name, V(g_recruitment)$name)]

# Exclude nodes out of Italy
net_postrecruitment <- asNetwork(g_postrecruitment - V(g_postrecruitment)[is.na(V(g_postrecruitment)$COD_REG)])

mod_egrm_postrecruitment <- 
  ergm(net_postrecruitment ~ edges +
         nodecov("pre_active_buff_20km") +
         nodefactor("north_south", base = 1) +
         nodefactor("gender") +
         nodematch("gender") +
         nodecov("sc_net") + nodecov("sc_trust")
  )
net_postrecruitment
save(g_recruitment, g_postrecruitment, file = 'debug_replication.RData')
```

#### Table 3

```{r, results = 'asis'}
texreg(list(mod_egrm_recruitment, mod_egrm_postrecruitment))
```

```{r results = 'hide'}
texreg(list(mod_egrm_recruitment, mod_egrm_postrecruitment), 
       file = "table/ergm-coef.tex", 
       custom.model.names = c("Recruitment network", "Post-recruitment network"))
htmlreg(list(mod_egrm_recruitment, mod_egrm_postrecruitment), 
        file = "table/ergm-coef.html", 
        custom.model.names = c("Recruitment network", "Post-recruitment network"))
```

### Logistic regression predicting at least one recruiting tie

```{r}
g_recruitment_df$joined_mu <- as.Date(g_recruitment_df$joined_mu)
g_recruitment_df$at_least_one_indegree <- g_recruitment_df$indegree > 0

# First phase
prop.table(table(g_recruitment_df[g_recruitment_df$joined_mu < 
                                    as.Date("2012-10-28"), ]$at_least_one_indegree,
                 g_recruitment_df[g_recruitment_df$joined_mu < 
                                    as.Date("2012-10-28"), ]$north_south),
           2)

mean(g_recruitment_df[g_recruitment_df$joined_mu < 
                        as.Date("2012-10-28"), ]$at_least_one_indegree)

# Second phase
prop.table(table(g_recruitment_df[g_recruitment_df$joined_mu >= 
                                    as.Date("2012-10-28") &
                                    g_recruitment_df$joined_mu < 
                                    as.Date("2013-02-24"), ]$at_least_one_indegree,
                 g_recruitment_df[g_recruitment_df$joined_mu >= 
                                    as.Date("2012-10-28") &
                                    g_recruitment_df$joined_mu < 
                                    as.Date("2013-02-24"), ]$north_south),
           2)

mean(g_recruitment_df[g_recruitment_df$joined_mu >= 
                        as.Date("2012-10-28") &
                        g_recruitment_df$joined_mu < 
                        as.Date("2013-02-24"), ]$at_least_one_indegree)

# Third phase
prop.table(table(g_recruitment_df[g_recruitment_df$joined_mu >= 
                                    as.Date("2013-02-24"), ]$at_least_one_indegree,
                 g_recruitment_df[g_recruitment_df$joined_mu >= 
                                    as.Date("2013-02-24"), ]$north_south),
           2)

mean(g_recruitment_df[g_recruitment_df$joined_mu >= 
                        as.Date("2013-02-24"), ]$at_least_one_indegree)

log_mod1 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south + gender + pre_active_buff_20km, 
      data = g_recruitment_df[g_recruitment_df$joined_mu < 
                                as.Date("2012-10-28"), ], 
      family = 'binomial')

log_mod2 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south + gender + pre_active_buff_20km, 
      data = g_recruitment_df[g_recruitment_df$joined_mu >= 
                                as.Date("2012-10-28") &
                                g_recruitment_df$joined_mu < 
                                as.Date("2013-02-24"), ], 
      family = 'binomial')

log_mod3 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south + gender + pre_active_buff_20km, 
      data = g_recruitment_df[g_recruitment_df$joined_mu >= 
                                as.Date("2013-02-24"), ], 
      family = 'binomial')

log_mod0 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south + gender + pre_active_buff_20km, 
      data = g_recruitment_df, 
      family = 'binomial')
```

```{r}

newdata <- 
  data.frame(gender = 
               'male',
             pre_active_buff_20km = mean(g_recruitment_df$pre_active_buff_20km, na.rm = T),
             sc_net = 
               c(mean(g_recruitment_df$sc_net[
                 g_recruitment_df$DEN_REG %in% 'Veneto']), 
                 mean(g_recruitment_df$sc_net[
                   g_recruitment_df$DEN_REG %in% 'Calabria'])),
             sc_trust = 
               c(mean(g_recruitment_df$sc_trust[
                 g_recruitment_df$DEN_REG %in% 'Veneto']), 
                 mean(g_recruitment_df$sc_trust[
                   g_recruitment_df$DEN_REG %in% 'Calabria'])),
             north_south = c("North", "South"))

pr1 <- 
  predict(log_mod1, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr2 <- 
  predict(log_mod2, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr3 <- 
  predict(log_mod3, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr0 <- 
  predict(log_mod0, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr_df <- 
  data.frame(where = rep(c('Veneto (North)','Calabria (South)'), 4),
             time = c(rep("2005-2012", 2), 
                      rep("2012-2013", 2),
                      rep("2013-2015", 2),
                      rep("2005-2015", 2)),
             fit = c(pr1$fit, pr2$fit, pr3$fit, pr0$fit),
             se.fit = c(pr1$se.fit, pr2$se.fit, pr3$se.fit, pr0$se.fit),
             stringsAsFactors = F)

pr_df$time <- 
  factor(pr_df$time, 
         levels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"))

pr_df$panel <- 
  factor(ifelse(pr_df$time == "2005-2015", "whole", "partial"), 
         levels = c("partial","whole"))

ggplot(pr_df, aes(colour = where, y = fit, x = time)) +
  geom_point() +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .3) + 
  theme_bw() + 
  labs(x = NULL, y = "Probability node has 1 recruiting tie or more") + 
  facet_wrap(~panel, scales = "free_x") + scale_y_continuous(labels = percent)
```

### Table 4

```{r, results = 'asis'}
stargazer(log_mod1, log_mod2, log_mod3, log_mod0, 
          column.labels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"))
```

```{r, results = 'hide'}
stargazer(log_mod1, log_mod2, log_mod3, log_mod0,
          column.labels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"),
          type = 'latex', out = 'table/logistic-coef.tex',
          report = "vc*",
          float = FALSE,
          style = 'ajps')
stargazer(log_mod1, log_mod2, log_mod3, log_mod0,
          column.labels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"),
          type = 'html', out = 'table/logistic-coef.html',
          report = "vc*",
          float = FALSE,
          style = 'ajps')
```

#### Only sc\_net varies

```{r}

g_recruitment_df$north_south_num <- as.numeric(g_recruitment_df$north_south)

log_mod1 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south_num + gender + pre_active_buff_20km, 
      data = g_recruitment_df[g_recruitment_df$joined_mu < 
                                as.Date("2012-10-28"), ], 
      family = 'binomial')

log_mod2 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south_num + gender + pre_active_buff_20km, 
      data = g_recruitment_df[g_recruitment_df$joined_mu >= 
                                as.Date("2012-10-28") &
                                g_recruitment_df$joined_mu < 
                                as.Date("2013-02-24"), ], 
      family = 'binomial')

log_mod3 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south_num + gender + pre_active_buff_20km, 
      data = g_recruitment_df[g_recruitment_df$joined_mu >= 
                                as.Date("2013-02-24"), ], 
      family = 'binomial')

log_mod0 <- 
  glm(at_least_one_indegree ~ sc_net + sc_trust + 
        north_south_num + gender + pre_active_buff_20km, 
      data = g_recruitment_df, 
      family = 'binomial')

newdata <- 
  data.frame(gender = 
               'male',
             pre_active_buff_20km = mean(g_recruitment_df$pre_active_buff_20km, na.rm = T),
             sc_net = 
               c(mean(g_recruitment_df$sc_net[
                 g_recruitment_df$DEN_REG %in% 'Veneto']), 
                 mean(g_recruitment_df$sc_net[
                   g_recruitment_df$DEN_REG %in% 'Calabria'])),
             sc_trust = 
               median(g_recruitment_df$sc_trust, na.rm = T),
             north_south_num = 1.5)

pr1 <- 
  predict(log_mod1, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr2 <- 
  predict(log_mod2, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr3 <- 
  predict(log_mod3, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr0 <- 
  predict(log_mod0, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr_df <- 
  data.frame(where = rep(c('High sc_net','Low sc_net'), 4),
             time = c(rep("2005-2012", 2), 
                      rep("2012-2013", 2),
                      rep("2013-2015", 2),
                      rep("2005-2015", 2)),
             fit = c(pr1$fit, pr2$fit, pr3$fit, pr0$fit),
             se.fit = c(pr1$se.fit, pr2$se.fit, pr3$se.fit, pr0$se.fit),
             stringsAsFactors = F)

pr_df$time <- 
  factor(pr_df$time, 
         levels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"))

pr_df$panel <- 
  factor(ifelse(pr_df$time == "2005-2015", "whole", "partial"), 
         levels = c("partial","whole"))

ggplot(pr_df, aes(colour = where, y = fit, x = time)) +
  geom_point() +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .3) + 
  theme_bw() + 
  labs(x = NULL, y = "Probability node has 1 recruiting tie or more", colour = NULL) + 
  facet_wrap(~panel, scales = "free_x") + scale_y_continuous(labels = percent) +
  theme(text = element_text(family = "Palatino"))

pr_df_net <- pr_df
pr_df_net$what <- 'sc_net'
```

#### Only sc\_trust varies

```{r}
newdata <- 
  data.frame(gender = 
               'male',
             pre_active_buff_20km = mean(g_recruitment_df$pre_active_buff_20km, na.rm = T),
             sc_trust = 
               c(mean(g_recruitment_df$sc_trust[
                 g_recruitment_df$DEN_REG %in% 'Veneto']), 
                 mean(g_recruitment_df$sc_trust[
                   g_recruitment_df$DEN_REG %in% 'Calabria'])),
             sc_net = 
               median(g_recruitment_df$sc_net, na.rm = T),
             north_south_num = 1.5)

pr1 <- 
  predict(log_mod1, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr2 <- 
  predict(log_mod2, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr3 <- 
  predict(log_mod3, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr0 <- 
  predict(log_mod0, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr_df <- 
  data.frame(where = rep(c('High sc_trust','Low sc_trust'), 4),
             time = c(rep("2005-2012", 2), 
                      rep("2012-2013", 2),
                      rep("2013-2015", 2),
                      rep("2005-2015", 2)),
             fit = c(pr1$fit, pr2$fit, pr3$fit, pr0$fit),
             se.fit = c(pr1$se.fit, pr2$se.fit, pr3$se.fit, pr0$se.fit),
             stringsAsFactors = F)

pr_df$time <- 
  factor(pr_df$time, 
         levels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"))

pr_df$panel <- 
  factor(ifelse(pr_df$time == "2005-2015", "whole", "partial"), 
         levels = c("partial","whole"))

ggplot(pr_df, aes(colour = where, y = fit, x = time)) +
  geom_point() +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .3) + 
  theme_bw() + 
  labs(x = NULL, y = "Probability node has 1 recruiting tie or more", colour = NULL) + 
  facet_wrap(~panel, scales = "free_x") + scale_y_continuous(labels = percent) +
  theme(text = element_text(family = "Palatino"))

pr_df_trust <- pr_df
pr_df_trust$what <- 'sc_trust'
```


#### Figure 9 

```{r}
pr_df <- rbind(pr_df_net, pr_df_trust)

kable(pr_df, digits = 3)

pr_df$colour <- factor(ifelse(grepl("High", pr_df$where), "High", "Low"), levels = c("High", "Low"))

ggplot(pr_df[pr_df$panel=='partial',], aes(colour = colour, y = fit, x = time)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .1) + 
  theme_bw() + 
  facet_wrap(~what) + 
  scale_shape_manual(values = c(2,6)) +
  scale_y_continuous(labels = percent) +
  guides(colour=guide_legend(ncol=2, title=NULL)) +
  labs(x = NULL, y = "Probability node has 1 recruiting tie or more") + 
  theme(text = element_text(family = "Palatino"))

ggsave("figure/logit-predict-from-sc-net-trust.pdf", width = 8, height = 4,
       ggplot(pr_df[pr_df$panel=='partial',], aes(colour = colour, y = fit, x = time)) +
         geom_point(size = 2) +
         geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .1) + 
         theme_bw() + 
         facet_wrap(~what) + 
         scale_shape_manual(values = c(2,6)) +
         scale_y_continuous(labels = percent) +
         guides(colour=guide_legend(ncol=2, title=NULL)) +
         labs(x = NULL, y = "Probability node has 1 recruiting tie or more") + 
         theme(text = element_text(family = "Palatino")))

```

#### Figure 10

```{r}
newdata <- 
  data.frame(gender = 
               'male',
             pre_active_buff_20km = mean(g_recruitment_df$pre_active_buff_20km, na.rm = T),
             sc_net = 
               c(mean(g_recruitment_df$sc_net[
                 g_recruitment_df$DEN_REG %in% 'Veneto']), 
                 mean(g_recruitment_df$sc_net[
                   g_recruitment_df$DEN_REG %in% 'Calabria'])),
             sc_trust = 
               c(mean(g_recruitment_df$sc_trust[
                 g_recruitment_df$DEN_REG %in% 'Veneto']), 
                 mean(g_recruitment_df$sc_trust[
                   g_recruitment_df$DEN_REG %in% 'Calabria'])),
             north_south_num = 1.5)

pr1 <- 
  predict(log_mod1, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr2 <- 
  predict(log_mod2, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr3 <- 
  predict(log_mod3, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr0 <- 
  predict(log_mod0, 
          newdata = newdata, 
          type="response", se.fit = TRUE)

pr_df <- 
  data.frame(where = rep(c('High Social capital (Veneto)','Low Social capital (Calabria)'), 4),
             time = c(rep("2005-2012", 2), 
                      rep("2012-2013", 2),
                      rep("2013-2015", 2),
                      rep("2005-2015", 2)),
             fit = c(pr1$fit, pr2$fit, pr3$fit, pr0$fit),
             se.fit = c(pr1$se.fit, pr2$se.fit, pr3$se.fit, pr0$se.fit),
             stringsAsFactors = F)

pr_df$time <- 
  factor(pr_df$time, 
         levels = c("2005-2012", "2012-2013", "2013-2015", "2005-2015"))

pr_df$panel <- 
  factor(ifelse(pr_df$time == "2005-2015", "whole", "partial"), 
         levels = c("partial","whole"))

kable(pr_df, digits = 2)

ggplot(pr_df, aes(colour = where, y = fit, x = time)) +
  geom_point() +
  geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .3) + 
  theme_bw() + 
  labs(x = NULL, y = "Probability node has 1 recruiting tie or more", colour = NULL) + 
  facet_grid(.~panel, scales = "free_x", space = "free_x") + 
  scale_y_continuous(labels = percent) +
  theme(text = element_text(family = "Palatino"))

ggsave("figure/logit-predict-from-social-capital.pdf", width = 8, height = 4,
       ggplot(pr_df, aes(colour = where, y = fit, x = time)) +
         geom_point() +
         geom_errorbar(aes(ymin = fit - se.fit, ymax = fit + se.fit), width = .3) + 
         theme_bw() + 
         labs(x = NULL, y = "Probability node has 1 recruiting tie or more", colour = NULL) + 
         facet_grid(.~panel, scales = "free_x", space = "free_x") + 
         scale_y_continuous(labels = percent) +
         theme(text = element_text(family = "Palatino")))

```
